<?php
#
# dmBridge: a data access framework for CONTENTdm(R)
#
# Copyright © 2009, 2010, 2011 Board of Regents of the Nevada System of Higher
# Education, on behalf of the University of Nevada, Las Vegas
#

/**
 * A collection of static string validation & manipulation methods.
 *
 * @author Alex Dolski <alex.dolski@unlv.edu>
 * @license http://www.opensource.org/licenses/mit-license.php
 */
abstract class DMString {

	/**
	 * @param string str
	 * @return string
	 * @since 0.4
	 */
	public static function camelCaseToUnderscore($str) {
		return strtolower(preg_replace('/(?<=[a-z])([A-Z])/', '_$1', $str));
	}

	/**
	 * Cleans a string of invalid UTF-8 characters using a strategy informed
	 * by http://www.phpwact.org/php/i18n/charsets. This <strong>may</strong>
	 * strip out <strong>a lot more</strong> than just invalid UTF-8 characters;
	 * see the inline comments.
	 *
	 * @param string str
	 * @return string Cleaned string
	 * @since 0.9
	 */
	public static function clean($str) {
		// Plan A: iconv; this is documented as working, but returns an empty
		// string for me. Should be harmless to try, at least.
		if (function_exists('iconv')) {
			$tmp = @iconv("UTF-8", "UTF-8//IGNORE", $str);
			if (strlen($tmp)) {
				return $tmp;
			}
		}
 		// Plan B: strip out any non-ASCII character
		return filter_var(
			$str,
			FILTER_UNSAFE_RAW,
			FILTER_FLAG_STRIP_LOW | FILTER_FLAG_STRIP_HIGH);
	}

	/**
	 * Strips all non-alphanumerics from a string, optionally sparing any
	 * characters in the $allowed array. This should be an indexed array with
	 * one character per element.
	 * This method inspired by and partially stolen from CakePHP's paranoid().
	 *
	 * @param string str
	 * @param string allowed
	 * @return string
	 * @author CakePHP Foundation
	 * @since 0.1
	 */
	public static function paranoid($str, $allowed=array()) {
		$allow = null;
		if (!empty($allowed)) {
			foreach ($allowed as $value) {
				$allow .= "\\$value";
			}
		}
		$cleaned = preg_replace("/[^{$allow}a-zA-Z0-9]/", '', $str);
		return $cleaned;
	}

	/**
	 * &quot;Highlights&quot; all occurrences of $term in $str by enclosing them
	 * in &lt;span&gt; elements with class $class.
	 * @param string term Term to match
	 * @param string str String in which to search
	 * @param string class
	 * @return string
	 */
	public static function highlight($term, $str,
			$class = "dmHighlightedTerm") {
		return str_ireplace($term,
				'<span class="' . $class . '">' . $term . '</span>',
				$str);
	}

	/**
	 * Searches through text strings and adds HTML anchor tags around any words
	 * beginning with http://, https://, ftp://, or gopher://.
	 *
	 * @param string str
	 * @param DOMDocument dxml
	 * @return If $dxml is null, a string; otherwise a DOMDocumentFragment
	 * @since 0.1
	 */
	public static function hyperlink($str, DOMDocument $dxml = null) {
		$protocols = array("http", "https", "ftp", "gopher");

		$tmp = explode(' ', $str);
		if ($dxml) {
			$fragment = $dxml->createDocumentFragment();
			foreach ($tmp as &$word) {
				$match = false;
				foreach ($protocols as $proto) {
					if ($proto . "://" == substr($word, 0, strlen($proto . "://"))) {
						$fragment->appendXML(
								sprintf('<a href="%s">%s</a>', $word, $word)
									. " ");
						$match = true;
						break;
					}
				}
				if (!$match) {
					$fragment->appendXML($word . " ");
				}
			}
			return $fragment;
		} else {
			foreach ($tmp as &$word) {
				foreach ($protocols as $proto) {
					if ($proto . "://" == substr($word, 0, strlen($proto . "://"))) {
						$word = sprintf('<a href="%s">%s</a>', $word, $word);
						break;
					}
				}
			}
			return implode(" ", $tmp);
		}
	}

	/**
	 * Truncates a text string to the desired word length, appending an
	 * ellipsis (...) at the end.
	 * @param string str
	 * @param int max_words
	 * @return string
	 * @since 0.1
	 */
	public static function truncate($str, $max_words) {
		$text_array = explode(' ', $str);
		if (count($text_array) > $max_words && $max_words > 0) {
			$str = implode(' ', array_slice($text_array, 0, (int) $max_words))
				. '...';
		}
		return $str;
	}

	/**
	 * @param string str
	 * @return Boolean True if $str is a valid UTF-8 string; false if not.
	 * @since 0.9
	 */
	public static function isUTF8($str) {
		// Plan A: iconv
		if (function_exists('iconv')) {
			return ($str == @iconv("UTF-8", "UTF-8", $str));
		}
		// Plan B: UTF-8 to Code Point Array Converter
		require_once(dirname(__FILE__) . "/../libraries/php-utf8/utf8.inc.php");
		return utf8toUnicode($str);
	}

	/**
	 * Adapted from EmailAddressValidator::check_domain_portion() by Dave Child.
	 * Supposedly RFC2822-compliant.
	 * @param str
	 * @author Dave Child
	 * @see http://code.google.com/p/php-email-address-validation/
	 * @since 0.1
	 */
	public static function isValidHostname($str) {
		// Total domain can only be from 1 to 255 characters, inclusive
		if ((strlen($str) < 1) || (strlen($str) > 255)) {
			 return false;
		}
		// Check if domain is IP, possibly enclosed in square brackets.
		if (preg_match('/^(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])'
			.'(\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])){3}$/'
			,$str) ||
			 preg_match('/^\[(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])'
			.'(\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])){3}\]$/'
			,$str)) {
			 return true;
		} else {
			 $arrDomainPortion = explode('.', $str);
			 if (sizeof($arrDomainPortion) < 2) {
				  return false; // Not enough parts to domain
			 }
			 for ($i = 0, $max = sizeof($arrDomainPortion); $i < $max; $i++) {
				  // Each portion must be between 1 and 63 characters, inclusive
					if ((strlen($arrDomainPortion[$i]) < 1)
							|| (strlen($arrDomainPortion[$i]) > 63)) {
						return false;
				  }
				  if (!preg_match('/^(([A-Za-z0-9][A-Za-z0-9-]{0,61}[A-Za-z0-9])|'
					  .'([A-Za-z0-9]+))$/', $arrDomainPortion[$i])) {
						return false;
				  }
			 }
		}
		return true;
	}

	/**
	 * @param string str
	 * @return bool
	 * @since 0.1
	 */
	public static function isValidIPAddress($str) {
		return filter_var($str, FILTER_VALIDATE_IP);
	}

	/**
	 * @param string str
	 * @param Boolean check_exists
	 * @return Boolean
	 * @since 0.1
	 */
	public static function isValidPathname($str, $check_exists = false) {
		$legal_chars = array('.', '_', '-', '/', '\\');
		if ($str == DMString::paranoid($str, $legal_chars)) {
			if ($check_exists) {
				return file_exists($str);
			}
			return true;
		}
		return false;
	}

	/**
	 * @param string str
	 * @param capitalize_first_char
	 * @return CamelCase string
	 * @since 0.4
	 */
	public static function underscoreToCamelCase(
			$str, $capitalize_first_char = false) {
		$words = explode('_', strtolower($str));
		$return = '';
		$count = count($words);
		for ($i = 0; $i < $count; $i++) {
			if ($i == 0 && !$capitalize_first_char) {
				$return .= trim($words[$i]);
			}
			else $return .= ucfirst(trim($words[$i]));
		}
		return $return;
	}

	/**
	 * Strips all HTML tags from a string and HTML-escapes it.
	 *
	 * @param string str String to escape
	 * @param string encoding An encoding accepted by PHP's
	 * htmlspecialchars() function
	 * @return string
	 * @since 0.3
	 */
	public static function websafe($str, $encoding = "UTF-8") {
		return htmlspecialchars(strip_tags($str), ENT_QUOTES, $encoding);
	}

	/**
	 * @param string str
	 * @return string
	 * @since 0.1
	 */
	public static function xmlentities($str) {
		return str_replace(array('&', '"', "'", '<', '>'),
			array('&amp;', '&quot;', '&apos;', '&lt;', '&gt;'),
			$str);
	}

}
